1. Description

This R Markdown document describes the analyses performed for the manuscript entitled “Environmental pollution correlates with parasite infection across a riverine landscape” by Io S. Deflem, Seppe Marchand, Federico C.F. Calboli, Joost A.M. Raeymaekers, Filip A.M. Volckaert and Pascal I. Hablützel.

The analyses were run in R 4.1.2

2. Study area and sampling

Up to thirty 0+ three-spined sticklebacks were sampled at 37 locations in the Dijle and Demer basins in Belgium during autumn 2016 under a permit of the Flemish Agency Nature and Forest (Fig. 1). Both basins together cover a continuous surface area of 3,624 km² with the furthest two sampling sites being located 117 km apart (distance measured along rivers). All locations included small and relatively slow flowing streams (drop off from highest to lowest point is 18 m) covering a wide range of ecological, hydromorphological, and physico-chemical characteristics. Fish were caught using a dip net.

3. Setting up working environment

# Empty environment
rm(list=ls())

# Set working directory to location where script is stored
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # requires installation of package "rstudioapi"

4 Loading and preparing host and parasite data

Fish were euthanized with a lethal dose of MS222 on the day of capture, following directions of the KU Leuven Animal Ethics Commission, and stored at -20 °C. Fish were kept in separate containers per site at all times. Lab based parasite screening of thawed fish involved placing individual fish in 5 or 10 ml cryo-tubes with 1 or 2 ml of distilled water. Following a vigorous shake of 10 s, the liquid was poured into a Petri dish and ectoparasites were identified and counted using a stereomicroscope. Fish were rinsed and checked again for the presence of ectoparasites on skin and fins. The intestines were examined for endoparasites. Before dissection, fish weight (± 0.1 mg) and standard length (± 1 mm) were recorded. To quantify body condition, we calculated the scaled mass index (SMI; Maceda-Veiga et al., 2014; Peig & Green, 2009). Sex was determined during dissection by inspection of gonad development. A total of 668 fish were dissected, which amounts to approximately 20 fish per location, with the exception of seven locations where only 10 fish were screened for the presence of macroparasites. Ecto- and endoparasites were morphologically identified to species level whenever possible.

# Parasite data
data <- read.csv("data_2016_2303.csv", sep=';')
data$site <- as.factor(data$site)

# Calculate parasite parameters
names(data)
##  [1] "site"                "fish"                "parspeciesrichness" 
##  [4] "div_shannon"         "div_simpson"         "temp"               
##  [7] "pH"                  "conductivity"        "nitrogen"           
## [10] "phosphorus"          "oxygen"              "netcen"             
## [13] "updist"              "updist2"             "updist3"            
## [16] "fishspeciesrichness" "weight"              "weigh..g."          
## [19] "length"              "SMI"                 "Sex"                
## [22] "Gyr"                 "Tri"                 "Glu"                
## [25] "ecto_screener"       "Con"                 "CysL"               
## [28] "Pro"                 "Aca"                 "Cam"                
## [31] "Ang"                 "CysI"                "endo_screener"      
## [34] "PI"                  "PI_ecto"             "PI_endo"
#parasite data is overdispersed (mostly so for Trichodina), if using average abundance data, species matrix needs to be transformed
datao <- na.omit(data[,c(1,22:24,26:32)]) #remove fish without parasite counts

library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
ddata <- dispweight(datao[,-1]) #correct for overdispersion of the parasite count data
avab <- aggregate(ddata, by = list(datao[,1]), function(x){mean(x, na.rm =T)})
prev = aggregate(data[,c(22:24,26:32)], by = list(data[,1]), function(x){sum(x >0, na.rm = T)/length(x)})
medin = aggregate(data[,c(22:24,26:32)], by = list(data[,1]), function(x){median(x[x >0], na.rm = T)}) 
pa = aggregate(data[,c(22:24,26:32)], by = list(data[,1]), function(x){ifelse(mean(x, na.rm =T)>0,1,0)}) 

avab[is.na(avab)] <- 0
prev[is.na(prev)] <- 0
medin[is.na(medin)] <- 0

# Host condition data
avcondition <- aggregate(data$SMI, by = list(data[,1]), function(x){mean(x, na.rm =T)})[,2]
avlength <- aggregate(data$length, by = list(data[,1]), function(x){mean(x, na.rm =T)})[,2]

# Parasite index
sgyr <- 1:nrow(data)
stri <- 1:nrow(data)
sglu <- 1:nrow(data)
scon <- 1:nrow(data)
scysl <- 1:nrow(data)
spro <- 1:nrow(data)
saca <- 1:nrow(data)
scam <- 1:nrow(data)
sang <- 1:nrow(data)

for(j in 1:nrow(data)){
  sgyr[j] <- data$Gyr[j]/sd(data$Gyr, na.rm=T)
  stri[j] <- data$Tri[j]/sd(data$Tri, na.rm=T)
  sglu[j] <- data$Glu[j]/sd(data$Glu, na.rm=T)
  scon[j] <- data$Con[j]/sd(data$Con, na.rm=T)
  scysl[j] <- data$CysL[j]/sd(data$CysL, na.rm=T)
  spro[j] <- data$Pro[j]/sd(data$Pro, na.rm=T)
  saca[j] <- data$Aca[j]/sd(data$Aca, na.rm=T)
  scam[j] <- data$Cam[j]/sd(data$Cam, na.rm=T)
  sang[j] <- data$Ang[j]/sd(data$Ang, na.rm=T)
}

PI <- 1:nrow(data)
for(j in 1:nrow(data)){
  PI[j] <- 10/max(sgyr, na.rm=T)*sgyr[j] + 10/max(stri, na.rm=T)*stri[j] + 10/max(sglu, na.rm=T)*sglu[j] +
    10/max(scon, na.rm=T)*scon[j] + 10/max(scysl, na.rm=T)*scysl[j] + 10/max(spro, na.rm=T)*spro[j] +
    10/max(saca, na.rm=T)*saca[j] + 10/max(scam, na.rm=T)*scam[j] + 10/max(sang, na.rm=T)*sang[j]
}

PI_ecto <- 1:nrow(data)
for(j in 1:nrow(data)){
    PI_ecto[j] <- 10/max(sgyr, na.rm=T)*sgyr[j] + 10/max(stri, na.rm=T)*stri[j] + 10/max(sglu, na.rm=T)*sglu[j]
}

PI_endo <- 1:nrow(data)
for(j in 1:nrow(data)){
  PI_endo[j] <-     10/max(scon, na.rm=T)*scon[j] + 10/max(scysl, na.rm=T)*scysl[j] + 10/max(spro, na.rm=T)*spro[j] +
    10/max(saca, na.rm=T)*saca[j] + 10/max(scam, na.rm=T)*scam[j] + 10/max(sang, na.rm=T)*sang[j]
}

avPI <- aggregate(PI, by = list(data[,1]), function(x){mean(x, na.rm =T)})[,2]
avPI_ecto <- aggregate(PI_ecto, by = list(data[,1]), function(x){mean(x, na.rm =T)})[,2]
avPI_endo <- aggregate(PI_endo, by = list(data[,1]), function(x){mean(x, na.rm =T)})[,2]

5 Loading and preparing environmental and spatial data

Physico-chemical data was provided by the Flemish Environmental Agency (VMM). Each fish sampling site was chosen at or near an environmental monitoring site of VMM. Water parameters include water temperature, pH, conductivity, dissolved oxygen (O2), saturation with dissolved oxygen, and Biochemical and Chemical Oxygen Demand (BOD and COD). Nutrient related water parameters include measurements of nitrate (NO3-), nitrite (NO2-), Kjeldahl nitrogen (KjN), total nitrogen (Nt), ammonium (NH4+), and total phosphorus (Pt). Following removal of strong collinear variables (significant correlation with P < 0.05 and Pearson correlation coefficient > |0.6|; Dormann et al., 2013), six environmental physico-chemical variables were retained (temperature, conductivity, COD, saturation with dissolved oxygen, ammonium, and total nitrogen), representing different aspects of water quality. For each water parameter, the average value of the year before sampling was calculated based on monthly monitoring data. Additionally, two hydromorphological variables were included: Tthe presence of a pool-riffle pattern and meanders were noted during field sampling and these parameters were included as binary variables (presence/absence) for a representation of abiotic habitat structure. Spatial (waterway) distances were calculated using the Network Analyst toolbox in ArcGIS. Upstream distance was defined as the maximal upstream distance from the sampling location. Network peripherality was calculated as the average waterway distance of a sampling location to all other locations. Hence, a total of eight environmental and two spatial variables were included in the statistical analysis.

# Environmental data (VMM)
environment <- read.csv("Environment_update.csv", sep=';')
spavar <- read.csv("space2.csv", sep=';') #spatial variables: network centrality and upstream distance
plot(spavar$netcen); plot(density(spavar$netcen))

plot(spavar$updist); plot(density(spavar$updist))

#environmental data (from field observations)
field_data <- read.csv("field_data.csv", sep=',')
environment2 <- cbind(environment[,c(1,49,52:53,55,57,60,63)], field_data[-c(8,10,25,27,37),33:34], spavar[,c(2,3)])
environment2$pool_riffle <- as.factor(environment2$pool_riffle)
environment2$meander <- as.factor(environment2$meander)

netcen <- spavar$netcen
updist <- spavar$updist

We used univariate generalized linear models to investigate how landscape-level effects modify infection patterns of individual parasite taxa, host size and condition. We kept the statistical models linear (as opposed to polynomial) and only considered main effects (i.e. no interaction terms) because we had no prior information from this study system that more complex models were necessary and because the study design with (only) 37 sampling sites was not intended for non-linear models. Ten explanatory variables (temperature, conductivity, COD, saturation with dissolved oxygen, ammonium, total nitrogen, the presence of pool-riffle patterns and meanders, upstream distance, and network peripherality) were included.

6. Bayesian Model Averaging

We used generalized linear models in a Bayesian Model Averaging (BMA) approach to understand how infection with individual parasite taxa relate to host characteristics (host size and condition), environmental conditions as well as spatial distance among sampling sites. Models were run using the R package BAS v1.5.5 (Clyde, 2020). BMA estimates the importance of parameters by averaging the estimates of the various models, each weighted by its model probability and accounts for model uncertainty (Hinne et al., 2020).

library(BAS)

# Make a matrix for PIP (Posterior Inclusion Probability)
PIP <- matrix(nrow=12, ncol=14)
rownames(PIP) <- c("Condition", "Length", "Temperature", "Oxygen saturation", "Conductivity", "COD", "Ammonium", "Total nitrogen", "Pool riffle pattern", "Meander", "Network centrality", "Upstream distance")
colnames(PIP) <- c("Condition", "Length", "Gyrodactylus abundance", "Gyrodactylus prevalence", "Gyrodactylus infection intensity", "Trichodina abundance", "Trichodina prevalence", "Trichodina infection intensity", "Glugea", "Contracaecum", "Aguillicola",
                   "PI", "PI ecto", "PI endo")  

6.1 Variation in host condition

bas.model <- bas.lm(avcondition ~  T_av + O2_sat_av + Con_av + COD_av + NH4._av + Nt_av + 
                      pool_riffle + meander + netcen + updist, 
                    data=environment2, prior="JZS")
plot(bas.model)

summary(bas.model)
##              P(B != 0 | Y) model 1    model 2    model 3    model 4    model 5
## Intercept       1.00000000  1.0000  1.0000000  1.0000000  1.0000000  1.0000000
## T_av            0.12386945  0.0000  1.0000000  0.0000000  0.0000000  0.0000000
## O2_sat_av       0.05073157  0.0000  0.0000000  0.0000000  0.0000000  0.0000000
## Con_av          0.03919858  0.0000  0.0000000  0.0000000  0.0000000  0.0000000
## COD_av          0.05197281  0.0000  0.0000000  0.0000000  0.0000000  1.0000000
## NH4._av         0.04846220  0.0000  0.0000000  0.0000000  0.0000000  0.0000000
## Nt_av           0.05248386  0.0000  0.0000000  0.0000000  0.0000000  0.0000000
## pool_riffle1    0.06729856  0.0000  0.0000000  0.0000000  1.0000000  0.0000000
## meander1        0.06527705  0.0000  0.0000000  0.0000000  0.0000000  0.0000000
## netcen          0.06898689  0.0000  0.0000000  1.0000000  0.0000000  0.0000000
## updist          0.04496936  0.0000  0.0000000  0.0000000  0.0000000  0.0000000
## BF                      NA  1.0000  0.6511099  0.3462387  0.2957714  0.2212089
## PostProbs               NA  0.6912  0.0450000  0.0239000  0.0204000  0.0153000
## R2                      NA  0.0000  0.0906000  0.0565000  0.0478000  0.0315000
## dim                     NA  1.0000  2.0000000  2.0000000  2.0000000  2.0000000
## logmarg                 NA  0.0000 -0.4290769 -1.0606268 -1.2181684 -1.5086476
image(bas.model, rotate=F)

coef.model <- coef(bas.model)
#abs(coef.model$postmean)-2*coef.model$postsd > 0
plot(confint(coef.model, parm = 2:11))
## Warning in arrows(x[not.deg], ci[not.deg, 1], x[not.deg], ci[not.deg, 2], :
## zero-length arrow is of indeterminate angle and so skipped

## NULL
confint <- confint(coef.model, parm = 2:11)
write.table(confint, 'condition.txt', sep="\t")
pip <- summary(bas.model)
PIP[c(3:12),1] <- pip[2:11,1]*sign(coef.model$postmean[2:11])
#coef.model$postmean[2:11]

6.2 Variation in host length

bas.model <- bas.lm(avlength ~ T_av + O2_sat_av + Con_av + COD_av + NH4._av + Nt_av + 
                      pool_riffle + meander + netcen + updist, 
                    data=environment2, prior="JZS")
plot(bas.model)

summary(bas.model)
##              P(B != 0 | Y)   model 1    model 2   model 3  model 4   model 5
## Intercept        1.0000000 1.0000000 1.00000000 1.0000000 1.000000 1.0000000
## T_av             0.1914598 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
## O2_sat_av        0.1407247 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
## Con_av           0.4373524 0.0000000 0.00000000 1.0000000 1.000000 0.0000000
## COD_av           0.1445232 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
## NH4._av          0.2026061 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
## Nt_av            0.1889236 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
## pool_riffle1     0.2259504 0.0000000 0.00000000 0.0000000 0.000000 1.0000000
## meander1         0.3217534 0.0000000 0.00000000 0.0000000 1.000000 0.0000000
## netcen           0.6121912 1.0000000 0.00000000 0.0000000 0.000000 1.0000000
## updist           0.1480063 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
## BF                      NA 0.8278694 0.07277547 0.2894406 1.000000 0.6785609
## PostProbs               NA 0.1582000 0.13910000 0.0553000 0.042500 0.0288000
## R2                      NA 0.2306000 0.00000000 0.1819000 0.314300 0.2982000
## dim                     NA 2.0000000 1.00000000 2.0000000 3.000000 3.0000000
## logmarg                 NA 2.4314765 0.00000000 1.3805712 2.620376 2.2325953
image(bas.model, rotate=F)

coef.model <- coef(bas.model)
#abs(coef.model$postmean)-2*coef.model$postsd > 0
plot(confint(coef.model, parm = 2:11))
## Warning in arrows(x[not.deg], ci[not.deg, 1], x[not.deg], ci[not.deg, 2], :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x[not.deg], ci[not.deg, 1], x[not.deg], ci[not.deg, 2], :
## zero-length arrow is of indeterminate angle and so skipped

## NULL
confint <- confint(coef.model, parm = 2:11)
write.table(confint, 'length.txt', sep="\t")
pip <- summary(bas.model)
PIP[c(3:12),1] <- pip[2:11,1]*sign(coef.model$postmean[2:11])
#coef.model$postmean[2:11]

# Prediction plot
newdata = as.data.frame(cbind(rep(mean(environment2$T_av), 37), 
                rep(mean(environment2$O2_sat_av), 37),
                rep(mean(environment2$Con_av), 37),
                rep(mean(environment2$COD_av), 37),
                rep(mean(environment2$NH4._av), 37),
                rep(mean(environment2$Nt_av), 37),
                rep(1, 37),
                rep(1, 37),
                rep(mean(netcen), 37),
                rep(mean(updist), 37)))
colnames(newdata) <- c("T_av", "O2_sat_av", "Con_av", "COD_av", "NH4._av", "Nt_av", "pool_riffle", "meander", "netcen", "updist")
newdata[,"pool_riffle"] <- as.factor(newdata[,"pool_riffle"]); newdata[,"meander"] <- as.factor(newdata[,"meander"])
newdata1 <- newdata; newdata1[,"netcen"] <- netcen
BMA <- predict(bas.model, newdata = newdata1, estimator = "BMA", se.fit=TRUE)

png(file="figure.png", res=600, width=3000, height=3000)

library(ggplot2)

figure_avlength <- ggplot(environment2, aes(netcen, BMA$fit)) +
  theme_bw() +
  geom_line(color="red", size=1) +
  geom_ribbon(aes(ymin = (BMA$fit-BMA$se.bma.fit), ymax = (BMA$fit+BMA$se.bma.fit)), alpha = .1) +
  geom_point(data = environment2, aes(x=netcen, y=avlength)) +
  labs(x=expression("Network peripherality [m]"), y=expression("Average host length [mm]")) +
  theme(axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12)) +  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
dev.off()
## png 
##   2
figure_avlength

6.3 Variation in Gyrodactylus infection

6.3.1 Mean abundance

bas.model <- bas.lm(avab$Gyr ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
yhat = fitted(bas.model, estimator = "BMA") #these are the fitted values under BMA
r = bas.model$Y - yhat #these are the model residuals
library(car)
## Loading required package: carData
qqPlot(r)

## [1] 10  4
plot(bas.model)

summary(bas.model)
##              P(B != 0 | Y)  model 1    model 2   model 3   model 4   model 5
## Intercept        1.0000000 1.000000 1.00000000 1.0000000 1.0000000 1.0000000
## avlength         0.5500651 1.000000 0.00000000 0.0000000 0.0000000 1.0000000
## avcondition      0.3049274 0.000000 0.00000000 0.0000000 0.0000000 1.0000000
## T_av             0.1620852 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## O2_sat_av        0.2017699 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## Con_av           0.2621665 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## COD_av           0.8086300 1.000000 0.00000000 1.0000000 1.0000000 1.0000000
## NH4._av          0.2417809 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## Nt_av            0.8618345 1.000000 1.00000000 1.0000000 1.0000000 1.0000000
## pool_riffle1     0.1760484 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## meander1         0.1775842 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## netcen           0.3006370 0.000000 0.00000000 1.0000000 0.0000000 0.0000000
## updist           0.1648814 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## BF                      NA 1.000000 0.03902505 0.5452757 0.1537678 0.8271829
## PostProbs               NA 0.072100 0.05160000 0.0393000 0.0369000 0.0265000
## R2                      NA 0.524000 0.28790000 0.5059000 0.4100000 0.5631000
## dim                     NA 4.000000 2.00000000 4.0000000 3.0000000 5.0000000
## logmarg                 NA 6.997337 3.75378552 6.3908733 5.1250253 6.8076077
image(bas.model, rotate=F)

coef.model <- coef(bas.model)
abs(coef.model$postmean)-2*coef.model$postsd > 0
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
confint(coef.model)
##                       2.5%        97.5%          beta
## Intercept     9.757392e-02 2.050174e-01  1.489134e-01
## avlength     -2.510537e-02 0.000000e+00 -8.122675e-03
## avcondition  -9.794814e-01 1.619784e-03 -1.500326e-01
## T_av         -2.075425e-02 1.707043e-02 -1.493132e-04
## O2_sat_av    -9.566862e-04 5.454556e-03  3.794809e-04
## Con_av       -1.034656e-04 4.962797e-04  5.726233e-05
## COD_av        0.000000e+00 1.298434e-02  6.652983e-03
## NH4._av      -6.226355e-02 7.268659e-02 -1.888173e-04
## Nt_av         0.000000e+00 5.247458e-02  2.930918e-02
## pool_riffle1 -5.751625e-02 8.407924e-02  3.966550e-03
## meander1     -8.753434e-02 5.141186e-02 -4.806259e-03
## netcen       -2.449916e-07 1.002058e-05  1.332021e-06
## updist       -1.172786e-06 1.784014e-06  4.618571e-08
## attr(,"Probability")
## [1] 0.95
## attr(,"class")
## [1] "confint.bas"
plot(confint(coef.model, parm = 2:11))

## NULL
confint <- confint(coef.model, parm = 2:11)
write.table(confint, 'GyroAA.txt', sep="\t")
pip <- summary(bas.model)
PIP[c(1:12),3] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

6.3.1.1 Prediction plot for marginal effect of host length on average Gyrodactylus infection

# Prediction plot
newdata = as.data.frame(cbind(rep(mean(avlength), 37),
                rep(mean(avcondition), 37),
                rep(mean(environment2$T_av), 37), 
                rep(mean(environment2$O2_sat_av), 37),
                rep(mean(environment2$Con_av), 37),
                rep(mean(environment2$COD_av), 37),
                rep(mean(environment2$NH4._av), 37),
                rep(mean(environment2$Nt_av), 37),
                rep(1, 37),
                rep(1, 37),
                rep(mean(netcen), 37),
                rep(mean(updist), 37)))
colnames(newdata) <- c("avlength", "avcondition", "T_av", "O2_sat_av", "Con_av", "COD_av", "NH4._av", "Nt_av", "pool_riffle", "meander", "netcen", "updist")
newdata[,"pool_riffle"] <- as.factor(newdata[,"pool_riffle"]); newdata[,"meander"] <- as.factor(newdata[,"meander"])
newdata1 <- newdata; newdata1[,"avlength"] <- avlength
BMA <- predict(bas.model, newdata = newdata1, estimator = "BMA", se.fit=TRUE)
ggplot(environment2, aes(avlength, BMA$fit)) +
  theme_bw() +
  geom_line(color="red", size=1) +
  geom_ribbon(aes(ymin = (BMA$fit-BMA$se.bma.fit), ymax = (BMA$fit+BMA$se.bma.fit)), alpha = .1) +
  geom_point(data = environment2, aes(x=avlength, y=avab$Gyr)) +
  labs(x=expression("Average host length [mm]"), y=expression("Average Gyrodactylus count")) +
  theme(axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12)) +  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

6.3.1.2 Prediction plot for marginal effect of COD on average Gyrodactylus infection

newdata1 <- newdata; newdata1[,"COD_av"] <- environment2$COD_av
BMA <- predict(bas.model, newdata = newdata1, estimator = "BMA", se.fit=TRUE)
ggplot(environment2, aes(COD_av, BMA$fit)) +
  theme_bw() +
  geom_line(color="red", size=1) +
  geom_ribbon(aes(ymin = (BMA$fit-BMA$se.bma.fit), ymax = (BMA$fit+BMA$se.bma.fit)), alpha = .1) +
  geom_point(data = environment2, aes(x=COD_av, y=avab$Gyr)) +
  labs(x=expression("COD"), y=expression("Average Gyrodactylus count")) +
  theme(axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12)) +  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

6.3.2.3 Prediction plot for marginal effect of total nitrogen on average Gyrodactylus infection

newdata1 <- newdata; newdata1[,"Nt_av"] <- environment2$Nt_av
BMA <- predict(bas.model, newdata = newdata1, estimator = "BMA", se.fit=TRUE)
ggplot(environment2, aes(Nt_av, BMA$fit)) +
  theme_bw() +
  geom_line(color="red", size=1) +
  geom_ribbon(aes(ymin = (BMA$fit-BMA$se.bma.fit), ymax = (BMA$fit+BMA$se.bma.fit)), alpha = .1) +
  geom_point(data = environment2, aes(x=Nt_av, y=avab$Gyr)) +
  labs(x=expression("Nt"), y=expression("Average Gyrodactylus count")) +
  theme(axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12)) +  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

6.3.2 Median infection intensity

bas.model <- bas.lm(medin$Gyr ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
yhat = fitted(bas.model, estimator = "BMA") #these are the fitted values under BMA
r = bas.model$Y - yhat #these are the model residuals
library(car)
plot(bas.model)

summary(bas.model)
##              P(B != 0 | Y) model 1    model 2   model 3    model 4    model 5
## Intercept       1.00000000  1.0000  1.0000000  1.000000  1.0000000  1.0000000
## avlength        0.02065469  0.0000  0.0000000  0.000000  0.0000000  0.0000000
## avcondition     0.04286905  0.0000  1.0000000  0.000000  0.0000000  0.0000000
## T_av            0.02991678  0.0000  0.0000000  0.000000  0.0000000  0.0000000
## O2_sat_av       0.02400544  0.0000  0.0000000  0.000000  0.0000000  0.0000000
## Con_av          0.02143314  0.0000  0.0000000  0.000000  0.0000000  0.0000000
## COD_av          0.03655302  0.0000  0.0000000  0.000000  1.0000000  0.0000000
## NH4._av         0.03250611  0.0000  0.0000000  0.000000  0.0000000  1.0000000
## Nt_av           0.04088810  0.0000  0.0000000  1.000000  0.0000000  0.0000000
## pool_riffle1    0.02307275  0.0000  0.0000000  0.000000  0.0000000  0.0000000
## meander1        0.02038443  0.0000  0.0000000  0.000000  0.0000000  0.0000000
## netcen          0.02173649  0.0000  0.0000000  0.000000  0.0000000  0.0000000
## updist          0.02738794  0.0000  0.0000000  0.000000  0.0000000  0.0000000
## BF                      NA  1.0000  0.3176354  0.302852  0.2488803  0.2411373
## PostProbs               NA  0.7775  0.0206000  0.019600  0.0161000  0.0156000
## R2                      NA  0.0000  0.0517000  0.049100  0.0381000  0.0363000
## dim                     NA  1.0000  2.0000000  2.000000  2.0000000  2.0000000
## logmarg                 NA  0.0000 -1.1468512 -1.194511 -1.3907833 -1.4223890
image(bas.model, rotate=F)

coef.model <- coef(bas.model)
abs(coef.model$postmean)-2*coef.model$postsd > 0
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
confint(coef.model)
##                  2.5%   97.5%          beta
## Intercept    1.543068 3.20706  2.405405e+00
## avlength     0.000000 0.00000 -5.938396e-04
## avcondition  0.000000 0.00000 -2.939823e-01
## T_av         0.000000 0.00000 -7.368692e-03
## O2_sat_av    0.000000 0.00000 -4.581964e-04
## Con_av       0.000000 0.00000 -1.594570e-05
## COD_av       0.000000 0.00000  1.609035e-03
## NH4._av      0.000000 0.00000  1.027436e-02
## Nt_av        0.000000 0.00000  7.570653e-03
## pool_riffle1 0.000000 0.00000  1.051497e-02
## meander1     0.000000 0.00000  2.511911e-03
## netcen       0.000000 0.00000  3.990331e-07
## updist       0.000000 0.00000  4.560731e-07
## attr(,"Probability")
## [1] 0.95
## attr(,"class")
## [1] "confint.bas"
plot(confint(coef.model, parm = 2:11))

## NULL
confint <- confint(coef.model, parm = 2:11)
write.table(confint, 'GyroAA.txt', sep="\t")
pip <- summary(bas.model)
PIP[c(1:12),3] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

6.3.2 Median infection intensity

bas.model <- bas.lm(prev$Gyr ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
yhat = fitted(bas.model, estimator = "BMA") #these are the fitted values under BMA
r = bas.model$Y - yhat #these are the model residuals
library(car)
plot(bas.model)

summary(bas.model)
##              P(B != 0 | Y)  model 1    model 2   model 3   model 4     model 5
## Intercept       1.00000000 1.000000 1.00000000 1.0000000 1.0000000  1.00000000
## avlength        0.16685753 0.000000 0.00000000 0.0000000 1.0000000  0.00000000
## avcondition     0.07728991 0.000000 0.00000000 0.0000000 0.0000000  0.00000000
## T_av            0.07242807 0.000000 0.00000000 0.0000000 0.0000000  0.00000000
## O2_sat_av       0.09667222 0.000000 0.00000000 0.0000000 0.0000000  0.00000000
## Con_av          0.58076129 1.000000 0.00000000 1.0000000 0.0000000  0.00000000
## COD_av          0.17268847 0.000000 0.00000000 1.0000000 0.0000000  0.00000000
## NH4._av         0.09134534 0.000000 0.00000000 0.0000000 0.0000000  0.00000000
## Nt_av           0.11056039 0.000000 0.00000000 0.0000000 0.0000000  0.00000000
## pool_riffle1    0.13922799 0.000000 0.00000000 0.0000000 0.0000000  0.00000000
## meander1        0.09905733 0.000000 0.00000000 0.0000000 0.0000000  0.00000000
## netcen          0.11864979 0.000000 0.00000000 0.0000000 0.0000000  1.00000000
## updist          0.06837623 0.000000 0.00000000 0.0000000 0.0000000  0.00000000
## BF                      NA 1.000000 0.08283414 0.8857497 0.1246735  0.07409002
## PostProbs               NA 0.228800 0.22750000 0.0369000 0.0285000  0.01700000
## R2                      NA 0.233300 0.00000000 0.3039000 0.1341000  0.10730000
## dim                     NA 2.000000 1.00000000 3.0000000 2.0000000  2.00000000
## logmarg                 NA 2.490915 0.00000000 2.3695941 0.4088580 -0.11155941
image(bas.model, rotate=F)

coef.model <- coef(bas.model)
abs(coef.model$postmean)-2*coef.model$postsd > 0
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
confint(coef.model)
##                       2.5%        97.5%          beta
## Intercept     2.515149e-01 3.952911e-01  3.224082e-01
## avlength     -2.293457e-02 3.515571e-05 -2.431148e-03
## avcondition  -4.697134e-01 2.698158e-03 -1.986363e-02
## T_av         -1.793383e-02 2.823966e-03 -7.123130e-04
## O2_sat_av    -4.098584e-03 0.000000e+00 -2.662986e-04
## Con_av        0.000000e+00 8.082879e-04  3.112530e-04
## COD_av        0.000000e+00 8.261193e-03  9.376253e-04
## NH4._av       0.000000e+00 4.189605e-02  1.064409e-03
## Nt_av        -6.691811e-05 2.526203e-02  1.612509e-03
## pool_riffle1 -5.751083e-04 1.573151e-01  1.465705e-02
## meander1     -1.078613e-01 0.000000e+00 -6.966751e-03
## netcen       -3.291983e-09 8.539728e-06  5.765326e-07
## updist       -5.258135e-07 5.412850e-07 -6.922933e-09
## attr(,"Probability")
## [1] 0.95
## attr(,"class")
## [1] "confint.bas"
plot(confint(coef.model, parm = 2:11))

## NULL
confint <- confint(coef.model, parm = 2:11)
write.table(confint, 'GyroAA.txt', sep="\t")
pip <- summary(bas.model)
PIP[c(1:12),3] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

6.3.2.3 Prediction plot for marginal effect of total nitrogen on average Gyrodactylus infection

newdata1 <- newdata; newdata1[,"Con_av"] <- environment2$Con_av
BMA <- predict(bas.model, newdata = newdata1, estimator = "BMA", se.fit=TRUE)
ggplot(environment2, aes(Con_av, BMA$fit)) +
  theme_bw() +
  geom_line(color="red", size=1) +
  geom_ribbon(aes(ymin = (BMA$fit-BMA$se.bma.fit), ymax = (BMA$fit+BMA$se.bma.fit)), alpha = .1) +
  geom_point(data = environment2, aes(x=Con_av, y=prev$Gyr)) +
  labs(x=expression("Conductivity"), y=expression("Gyrodactylus prevalence")) +
  theme(axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12)) +  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

6.4 Variation in Trichodina infection

6.4.1 Mean abundance

bas.model <- bas.lm(avab$Tri ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
yhat = fitted(bas.model, estimator = "BMA") #these are the fitted values under BMA
r = bas.model$Y - yhat #these are the model residuals
plot(bas.model)

summary(bas.model)
##              P(B != 0 | Y)   model 1  model 2  model 3   model 4   model 5
## Intercept       1.00000000 1.0000000 1.000000 1.000000 1.0000000 1.0000000
## avlength        0.12444042 0.0000000 0.000000 0.000000 0.0000000 0.0000000
## avcondition     0.09559152 0.0000000 0.000000 0.000000 0.0000000 0.0000000
## T_av            0.08500185 0.0000000 0.000000 0.000000 0.0000000 0.0000000
## O2_sat_av       0.09751169 0.0000000 0.000000 0.000000 0.0000000 0.0000000
## Con_av          0.42180029 0.0000000 1.000000 0.000000 0.0000000 0.0000000
## COD_av          0.46724539 0.0000000 1.000000 0.000000 0.0000000 1.0000000
## NH4._av         0.21465122 0.0000000 0.000000 0.000000 1.0000000 0.0000000
## Nt_av           0.34576751 0.0000000 0.000000 1.000000 0.0000000 1.0000000
## pool_riffle1    0.13743699 0.0000000 0.000000 0.000000 0.0000000 0.0000000
## meander1        0.08968689 0.0000000 0.000000 0.000000 0.0000000 0.0000000
## netcen          0.10313787 0.0000000 0.000000 0.000000 0.0000000 0.0000000
## updist          0.08604777 0.0000000 0.000000 0.000000 0.0000000 0.0000000
## BF                      NA 0.0239404 1.000000 0.170643 0.1105942 0.2711811
## PostProbs               NA 0.1650000 0.104400 0.098000 0.0635000 0.0283000
## R2                      NA 0.0000000 0.358500 0.209300 0.1890000 0.3063000
## dim                     NA 1.0000000 3.000000 2.000000 2.0000000 3.0000000
## logmarg                 NA 0.0000000 3.732188 1.964006 1.5303004 2.4272192
image(bas.model, rotate=F)

coef.model <- coef(bas.model)
abs(coef.model$postmean)-2*coef.model$postsd > 0
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
confint(coef.model)
##                       2.5%        97.5%          beta
## Intercept     1.236838e-01 2.713064e-01  2.002413e-01
## avlength     -1.753421e-02 8.041130e-06 -1.114390e-03
## avcondition  -4.976994e-01 1.059556e-01 -2.394139e-02
## T_av         -1.685237e-02 8.151576e-03 -1.479738e-04
## O2_sat_av    -1.565878e-03 3.458497e-03  1.132458e-04
## Con_av        0.000000e+00 7.773794e-04  2.109076e-04
## COD_av        0.000000e+00 1.383223e-02  4.203136e-03
## NH4._av      -7.004716e-05 1.063660e-01  1.096330e-02
## Nt_av        -6.776110e-06 4.932998e-02  1.044647e-02
## pool_riffle1  0.000000e+00 1.495831e-01  1.212839e-02
## meander1     -5.049001e-02 5.002283e-02 -1.678368e-03
## netcen       -6.688481e-06 5.486238e-07 -2.846749e-07
## updist       -7.668974e-07 1.377327e-06 -1.915234e-08
## attr(,"Probability")
## [1] 0.95
## attr(,"class")
## [1] "confint.bas"
plot(confint(coef.model, parm = 2:11))

## NULL
confint <- confint(coef.model, parm = 2:11)
write.table(confint, 'GyroAA.txt', sep="\t")
pip <- summary(bas.model)
PIP[c(1:12),3] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

6.4.1 Median infection intensity

bas.model <- bas.lm(medin$Tri ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
yhat = fitted(bas.model, estimator = "BMA") #these are the fitted values under BMA
r = bas.model$Y - yhat #these are the model residuals
plot(bas.model)

summary(bas.model)
##              P(B != 0 | Y)  model 1    model 2   model 3   model 4   model 5
## Intercept        1.0000000 1.000000 1.00000000 1.0000000 1.0000000 1.0000000
## avlength         0.2881774 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## avcondition      0.2224163 0.000000 0.00000000 0.0000000 1.0000000 0.0000000
## T_av             0.1574615 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## O2_sat_av        0.1621652 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## Con_av           0.7912882 1.000000 0.00000000 1.0000000 1.0000000 1.0000000
## COD_av           0.7219333 1.000000 0.00000000 0.0000000 1.0000000 1.0000000
## NH4._av          0.3248706 0.000000 1.00000000 1.0000000 0.0000000 0.0000000
## Nt_av            0.3218716 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## pool_riffle1     0.2081402 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## meander1         0.2655250 0.000000 0.00000000 0.0000000 0.0000000 1.0000000
## netcen           0.2107764 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## updist           0.1536937 0.000000 0.00000000 0.0000000 0.0000000 0.0000000
## BF                      NA 1.000000 0.04957628 0.2279222 0.5092549 0.4837322
## PostProbs               NA 0.148800 0.04060000 0.0339000 0.0227000 0.0216000
## R2                      NA 0.449900 0.26810000 0.3987000 0.4816000 0.4799000
## dim                     NA 3.000000 2.00000000 3.0000000 4.0000000 4.0000000
## logmarg                 NA 6.288599 3.28435583 4.8098478 5.6137920 5.5623749
image(bas.model, rotate=F)

coef.model <- coef(bas.model)
abs(coef.model$postmean)-2*coef.model$postsd > 0
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
confint(coef.model)
##                       2.5%        97.5%          beta
## Intercept     1.687000e+01 3.424741e+01  2.547297e+01
## avlength     -3.633816e+00 3.722863e-03 -5.158998e-01
## avcondition  -1.370594e+02 9.319089e+00 -1.375058e+01
## T_av         -2.145952e+00 4.298074e+00  2.065105e-01
## O2_sat_av    -5.220756e-01 4.660539e-01 -1.498026e-02
## Con_av        0.000000e+00 1.279882e-01  6.585727e-02
## COD_av       -2.513913e-04 1.985159e+00  9.195418e-01
## NH4._av      -2.705385e+00 1.560839e+01  2.222211e+00
## Nt_av        -8.523325e-02 6.411363e+00  1.012166e+00
## pool_riffle1 -3.153194e+00 2.344610e+01  2.247962e+00
## meander1     -2.618250e+01 1.026793e+00 -3.523477e+00
## netcen       -1.483218e-03 1.375942e-04 -1.366046e-04
## updist       -2.811326e-04 2.389653e-04  5.653370e-06
## attr(,"Probability")
## [1] 0.95
## attr(,"class")
## [1] "confint.bas"
plot(confint(coef.model, parm = 2:11))

## NULL
confint <- confint(coef.model, parm = 2:11)
write.table(confint, 'GyroAA.txt', sep="\t")
pip <- summary(bas.model)
PIP[c(1:12),3] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

6.3.2.3 Prediction plot for marginal effect of conductivity on median infection intensity with Trichodina

newdata1 <- newdata; newdata1[,"Con_av"] <- environment2$Con_av
BMA <- predict(bas.model, newdata = newdata1, estimator = "BMA", se.fit=TRUE)
ggplot(environment2, aes(Con_av, BMA$fit)) +
  theme_bw() +
  geom_line(color="red", size=1) +
  geom_ribbon(aes(ymin = (BMA$fit-BMA$se.bma.fit), ymax = (BMA$fit+BMA$se.bma.fit)), alpha = .1) +
  geom_point(data = environment2, aes(x=Con_av, y=medin$Tri)) +
  labs(x=expression("Conductivity"), y=expression("Trichodina median infection intensity")) +
  theme(axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12)) +  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

6.3.2.3 Prediction plot for marginal effect of COD on median infection intensity with Trichodina

newdata1 <- newdata; newdata1[,"COD_av"] <- environment2$COD_av
BMA <- predict(bas.model, newdata = newdata1, estimator = "BMA", se.fit=TRUE)
ggplot(environment2, aes(COD_av, BMA$fit)) +
  theme_bw() +
  geom_line(color="red", size=1) +
  geom_ribbon(aes(ymin = (BMA$fit-BMA$se.bma.fit), ymax = (BMA$fit+BMA$se.bma.fit)), alpha = .1) +
  geom_point(data = environment2, aes(x=COD_av, y=medin$Tri)) +
  labs(x=expression("Conductivity"), y=expression("Trichodina median infection intensity")) +
  theme(axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12)) +  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

6.4.1 Prevalence

bas.model <- bas.lm(prev$Tri ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
yhat = fitted(bas.model, estimator = "BMA") #these are the fitted values under BMA
r = bas.model$Y - yhat #these are the model residuals
plot(bas.model)

summary(bas.model)
##              P(B != 0 | Y)   model 1   model 2    model 3   model 4   model 5
## Intercept       1.00000000 1.0000000 1.0000000  1.0000000 1.0000000  1.000000
## avlength        0.04005815 0.0000000 0.0000000  0.0000000 0.0000000  0.000000
## avcondition     0.03389439 0.0000000 0.0000000  0.0000000 0.0000000  0.000000
## T_av            0.03654853 0.0000000 0.0000000  0.0000000 0.0000000  0.000000
## O2_sat_av       0.03749123 0.0000000 0.0000000  0.0000000 0.0000000  0.000000
## Con_av          0.20573471 0.0000000 1.0000000  0.0000000 1.0000000  0.000000
## COD_av          0.04629842 0.0000000 0.0000000  0.0000000 0.0000000  0.000000
## NH4._av         0.06690739 0.0000000 0.0000000  1.0000000 0.0000000  0.000000
## Nt_av           0.04604656 0.0000000 0.0000000  0.0000000 0.0000000  1.000000
## pool_riffle1    0.03411173 0.0000000 0.0000000  0.0000000 0.0000000  0.000000
## meander1        0.03657952 0.0000000 0.0000000  0.0000000 0.0000000  0.000000
## netcen          0.07467299 0.0000000 0.0000000  0.0000000 1.0000000  0.000000
## updist          0.07352673 0.0000000 0.0000000  0.0000000 0.0000000  0.000000
## BF                      NA 0.5616705 0.7274201  0.2732671 1.0000000  0.155205
## PostProbs               NA 0.6354000 0.0686000  0.0258000 0.0171000  0.014600
## R2                      NA 0.0000000 0.1264000  0.0750000 0.2249000  0.044000
## dim                     NA 1.0000000 2.0000000  2.0000000 3.0000000  2.000000
## logmarg                 NA 0.0000000 0.2585887 -0.7204656 0.5768398 -1.286169
image(bas.model, rotate=F)

coef.model <- coef(bas.model)
abs(coef.model$postmean)-2*coef.model$postsd > 0
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
confint(coef.model)
##                       2.5%        97.5%          beta
## Intercept     7.810986e-01 8.852866e-01  8.340580e-01
## avlength      0.000000e+00 0.000000e+00 -1.453558e-04
## avcondition   0.000000e+00 0.000000e+00 -2.610917e-03
## T_av          0.000000e+00 0.000000e+00 -1.135202e-05
## O2_sat_av     0.000000e+00 0.000000e+00 -1.893466e-05
## Con_av        0.000000e+00 4.227631e-04  6.570599e-05
## COD_av        0.000000e+00 0.000000e+00  9.451636e-05
## NH4._av       0.000000e+00 2.099804e-02  1.713706e-03
## Nt_av         0.000000e+00 0.000000e+00  1.739937e-04
## pool_riffle1  0.000000e+00 0.000000e+00  2.104077e-04
## meander1      0.000000e+00 0.000000e+00 -6.652944e-04
## netcen       -3.765093e-06 0.000000e+00 -3.317353e-07
## updist       -1.392896e-06 2.501169e-09 -1.309710e-07
## attr(,"Probability")
## [1] 0.95
## attr(,"class")
## [1] "confint.bas"
plot(confint(coef.model, parm = 2:11))

## NULL
confint <- confint(coef.model, parm = 2:11)
write.table(confint, 'GyroAA.txt', sep="\t")
pip <- summary(bas.model)
PIP[c(1:12),3] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

6.5 Figure

PIP <- matrix(nrow=12, ncol=14)
rownames(PIP) <- c("Condition", "Length", "Temperature", "Oxygen saturation", "Conductivity", "COD", "Ammonium", "Total nitrogen", "Pool riffle pattern", "Meander", "Network centrality", "Upstream distance")
colnames(PIP) <- c("Condition", "Length", "Gyrodactylus abundance", "Gyrodactylus prevalence", "Gyrodactylus infection intensity", "Trichodina abundance", "Trichodina prevalence", "Trichodina infection intensity", "Glugea", "Contracaecum", "Aguillicola",
                   "PI", "PI ecto", "PI endo")  

#Condition
bas.model <- bas.lm(avcondition ~ T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(3:12),1] <- pip[2:11,1]*sign(coef.model$postmean[2:11])

#Length
bas.model <- bas.lm(avlength ~ T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(3:12),2] <- pip[2:11,1]*sign(coef.model$postmean[2:11])

#Gyrodactylus abundance
bas.model <- bas.lm(avab$Gyr ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),3] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#Gyrodactylus prevalence
bas.model <- bas.lm(prev$Gyr ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),4] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#Gyrodactylus infection intensity
bas.model <- bas.lm(medin$Gyr ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),5] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#Trichodina abundance
bas.model <- bas.lm(avab$Tri ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),6] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#Trichodina prevalence
bas.model <- bas.lm(prev$Tri ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),7] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#Trichodina infection intensity
bas.model <- bas.lm(medin$Tri ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),8] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#Glugea
bas.model <- bas.glm(pa$Glu ~ avlength + avcondition + T_av + O2_sat_av + Con_av^2 + COD_av + 
                       log(NH4._av) + log(Nt_av) + log(SM_av) + pool_riffle + meander + 
                       spavar$netcen + spavar$updist, data=environment2, betaprior=g.prior(100), family=binomial)
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),9] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#Contracaecum
bas.model <- bas.glm(pa$Con ~ avlength + avcondition + T_av + O2_sat_av + Con_av^2 + COD_av + 
                       log(NH4._av) + log(Nt_av) + log(SM_av) + pool_riffle + meander + 
                       spavar$netcen + spavar$updist, data=environment2, betaprior=g.prior(100), family=binomial)
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),10] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#Anguillicola
bas.model <- bas.glm(pa$Ang ~ avlength + avcondition + T_av + O2_sat_av + Con_av^2 + COD_av + 
                       log(NH4._av) + log(Nt_av) + log(SM_av) + pool_riffle + meander + 
                       spavar$netcen + spavar$updist, data=environment2, betaprior=g.prior(100), family=binomial)
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),11] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#PI
bas.model <- bas.lm(avPI ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),12] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#PI ecto
bas.model <- bas.lm(avPI_ecto ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),13] <- pip[2:13,1]*sign(coef.model$postmean[2:13])

#PI endo
bas.model <- bas.lm(avPI_endo ~ avlength + avcondition + T_av + O2_sat_av + Con_av + COD_av 
                     + NH4._av + Nt_av + pool_riffle + meander + netcen + 
                       updist, data=environment2, prior="JZS")
coef.model <- coef(bas.model)
pip <- summary(bas.model)
PIP[c(1:12),14] <- pip[2:13,1]*sign(coef.model$postmean[2:13])
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
x = round(PIP, digits=2)
x[abs(PIP)<0.5] <- ""
x[abs(PIP)>0.5] <- "+"
heatmap.2(PIP,
          cellnote = x,
          #main = "Correlation",
          notecex=1,
          notecol="white",
          density.info="none",
          trace="none",
          margins =c(10,6),
          col=redblue(256),
          dendrogram="both",
          cexRow = 0.7,
          cexCol = 0.7,
          key.title = "PIP",
          lhei = c(1,3),
          lwid = c(0.5, 0.5),
          #Colv="NA"
          ) 

7. BORAL analysis

Model-based analysis of multivariate abundance data using Bayesian Markov chain Monte Carlo methods for parameter estimation

7.1 BORAL analysis for average abundances of parasites

library(boral)
## Loading required package: coda
## This is boral version 2.0. If you recently updated boral, please check news(package = "boral") for the updates in the latest version.
data$Site <- as.factor(data$site)
levels(data$site) <- levels(as.factor(environment2$Site))
data_m <- merge(data, environment2, by = "Site")
data_all <- na.omit(data_m) 
names(data_all)
##  [1] "Site"                "site"                "fish"               
##  [4] "parspeciesrichness"  "div_shannon"         "div_simpson"        
##  [7] "temp"                "pH"                  "conductivity"       
## [10] "nitrogen"            "phosphorus"          "oxygen"             
## [13] "netcen.x"            "updist.x"            "updist2"            
## [16] "updist3"             "fishspeciesrichness" "weight"             
## [19] "weigh..g."           "length"              "SMI"                
## [22] "Sex"                 "Gyr"                 "Tri"                
## [25] "Glu"                 "ecto_screener"       "Con"                
## [28] "CysL"                "Pro"                 "Aca"                
## [31] "Cam"                 "Ang"                 "CysI"               
## [34] "endo_screener"       "PI"                  "PI_ecto"            
## [37] "PI_endo"             "T_av"                "O2_sat_av"          
## [40] "Con_av"              "COD_av"              "NH4._av"            
## [43] "Nt_av"               "SM_av"               "pool_riffle"        
## [46] "meander"             "updist.y"            "netcen.y"
avcondition <- aggregate(data$SMI, by = list(data[,1]), function(x){mean(x, na.rm =T)})[,2]
avlength <- aggregate(data$length, by = list(data[,1]), function(x){mean(x, na.rm =T)})[,2]

y <- round(cbind(avab$Gyr, avab$Tri, avab$Glu, avab$Con, avab$Ang))
X <- cbind(avcondition,
           avlength,
           environment2$T_av,
           environment2$O2_sat_av,
           environment2$Con_av,
           environment2$COD_av, 
           environment2$NH4._av, 
           environment2$Nt_av, 
           environment2$netcen, 
           environment2$updist, 
           as.numeric(environment2$pool_riffle), 
           as.numeric(environment2$meander))
colnames(X) <- c("avcondition", "avlength", "T", "O2", "Con", "COD", "NH4", "Nt", "netcen", "updist", "pool_riffle", "meander")

example_mcmc_control <- list(n.burnin = 1000, n.iteration = 10000, n.thin = 1)
testpath <- file.path(tempdir(), "jagsboralmodel.txt")
paramod <- boral(y, X = X,
                      family = "negative.binomial",
                      mcmc.control = example_mcmc_control,
                      model.name = testpath,
                      lv.control = list(num.lv = 2, type = "independent"),
                      save.model = TRUE)
## module glm loaded
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 185
##    Unobserved stochastic nodes: 338
##    Total graph size: 2173
## 
## Initializing model
plot(paramod)
## NULL

coefsplot(covname = "avcondition", object = paramod) #Condition

coefsplot(covname = "avlength", object = paramod) #Length

coefsplot(covname = "T", object = paramod) #Temperature

coefsplot(covname = "O2", object = paramod) #Oxygen

coefsplot(covname = "Con", object = paramod) #Conductivity

coefsplot(covname = "COD", object = paramod) #COD

coefsplot(covname = "NH4", object = paramod) #NH4

coefsplot(covname = "Nt", object = paramod) #Nt

coefsplot(covname = "netcen", object = paramod) #netcen

coefsplot(covname = "updist", object = paramod) #updist

coefsplot(covname = "pool_riffle", object = paramod) #poolriffle

coefsplot(covname = "meander", object = paramod) #meander

envcors <- get.enviro.cor(paramod)
rescors <- get.residual.cor(paramod)
library(corrplot)
## corrplot 0.92 loaded
corrplot(envcors$sig.cor, type = "lower", diag = FALSE, title = "Correlations due to covariates", mar = c(3,0.5,2,1), tl.srt = 45)

corrplot(rescors$sig.cor, type = "lower", diag = FALSE, title = "Residual correlations", mar = c(3,0.5,2,1), tl.srt = 45)

7.1 BORAL analysis for median infection intensities of parasites

y <- round(cbind(medin$Gyr, medin$Tri, medin$Glu, medin$Con, medin$Ang))
paramod <- boral(y, X = X,
                      family = "negative.binomial",
                      mcmc.control = example_mcmc_control,
                      model.name = testpath,
                      lv.control = list(num.lv = 2, type = "independent"),
                      save.model = TRUE)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 185
##    Unobserved stochastic nodes: 338
##    Total graph size: 2173
## 
## Initializing model
plot(paramod)
## NULL

coefsplot(covname = "avcondition", object = paramod) #Condition

coefsplot(covname = "avlength", object = paramod) #Length

coefsplot(covname = "T", object = paramod) #Temperature

coefsplot(covname = "O2", object = paramod) #Oxygen

coefsplot(covname = "Con", object = paramod) #Conductivity

coefsplot(covname = "COD", object = paramod) #COD

coefsplot(covname = "NH4", object = paramod) #NH4

coefsplot(covname = "Nt", object = paramod) #Nt

coefsplot(covname = "netcen", object = paramod) #netcen

coefsplot(covname = "updist", object = paramod) #updist

coefsplot(covname = "pool_riffle", object = paramod) #poolriffle

coefsplot(covname = "meander", object = paramod) #meander

envcors <- get.enviro.cor(paramod)
rescors <- get.residual.cor(paramod)
library(corrplot)
corrplot(envcors$sig.cor, type = "lower", diag = FALSE, title = "Correlations due to covariates", mar = c(3,0.5,2,1), tl.srt = 45)

corrplot(rescors$sig.cor, type = "lower", diag = FALSE, title = "Residual correlations", mar = c(3,0.5,2,1), tl.srt = 45)